import requests
import re
url = 'http://www.weather.com.cn/weather1d/101210401.shtml'  # 爬虫打开的网页
resp = requests.get(url)    # 打开网址，发送请求
# 设置编码格式
resp.encoding = 'utf-8'
#print(resp.text)    # 爬取整个html的文本

'''
<span class="name">三亚</span>
<span class="weather">多云转雷阵雨</span>
<span class="wd">34/26℃</span>
<span class="zs">较适宜</span>
'''
city = re.findall('<span class="name">([\u4e00-\u9fa5]*)</span>', resp.text)
weather = re.findall('<span class="weather">([\u4e00-\u9fa5]*)</span>', resp.text)
wd = re.findall('<span class="wd">(.*)</span>', resp.text)
zs = re.findall('<span class="zs">([\u4e00-\u9fa5]*)</span>', resp.text)
# print(city)
# print(weather)
# print(wd)
# print(zs)

lst = []
for a, b, c, d in zip(city, weather, wd, zs):
    lst.append([a, b, c, d])
for item in lst:
    print(item)